***************************************************************************************
** S-RLS Panel Full Survey **
** Data Cleaning File      **                                     
***************************************************************************************

 ** WRITTEN BY:   Bailey Palmer
 ** Date created: 2 Jun 2021

clear all
set more off
set maxvar 10000


***************************************************************************************
/* START CLEANING THE S-RLS SURVEY DATA */
***************************************************************************************

use "${dir}/intermediate_public/Panel_Mar2020_deidentified_lab", clear  

****************************************
* Drop callback administrative vars *
****************************************
//drop CTO junk
drop deviceid_callback subscriberid_callback simid_callback ///
  devicephonenum_callback 
//assert that this stuff is redundant/junk
assert mi(caseid_callback)
assert inlist(enumerator_confirm_callback,.,1111111111)
assert inlist(q101_confirm_callback,.,1111111111)
assert inlist(enumerator_name_confirm_callback,1,.)

//drop enumerator junk and CTO variables
drop caseid_callback  enumerator_confirm_callback deviceid ///
  enumerator_name_confirm_callback q101_confirm_callback  ///
  button_label_callback enumerator_name_confirm_1_callba

//drop callback housekeeping
drop call_status_callback call_status_final_1_callback  ///
  alt_call_status_callback call_status_final_2_callback ///
  final_call_status_callback call_status_label_callback ///
  formdef_version_callback  today_callback confirm_workingday_callback

//drop empty variables - nobody terminated the survey early
foreach var of varlist q1000b_callback q1000b_*_callback {
  assert mi(`var')
  drop `var'
}

//edit naming of conclusion of callback section for consistency
rename q1007_callback q2507_callback
rename q1007?_callback q2507?_callback
rename q1000a_callback q2500a_callback
rename q100?_callback q250?_callback
rename q1003?_callback q2503?_callback
rename q1003a_confirm_callback q2503a_confirm_callback
rename q1005a_why_callback q2505a_why_callback


//drop these redundant variables 
assert mi(q110b_callback) //all are missing, so drop the indicator versions of this var
drop q110b_*_callback
assert mi(q110c_callback) //all are missing, so drop the indicator versions of this var
drop q110c_*_callback


****************************************
* General Housekeeping *
****************************************
//Recode all indicator vars as 0/1 not 1/2
quietly ds , has(type numeric)
foreach var in `r(varlist)' {
  quietly count if !inlist(`var',.,1,2)
  if `r(N)'==0 {
    assert inlist(`var',.,1,2)==1
    local label1 :  label  `var' 1
    local label2 :  label  `var' 2
    if "`label1'"=="Yes" & "`label2'"=="No" {
      replace `var' = 0 if `var'==2 
      assert inlist(`var',.,1,0)
      lab define `var' 0 "No" 1 "Yes", replace 
      lab values `var' `var'
    }
  }
  else {
    quietly count if !inlist(`var',.,1,2,-99)
    if `r(N)'==0 {
      assert inlist(`var',.,1,2,-99)==1
      local label1 :  label  `var' 1
      local label2 :  label  `var' 2
      local label9 : label `var' -99
      if "`label1'"=="Yes" & "`label2'"=="No" {
        replace `var' = 0 if `var'==2 
        assert inlist(`var',.,1,0,-99)
        lab define `var' 0 "No" 1 "Yes" -99 "`label9'", replace 
        lab values `var' `var'
        tab `var',mi nolab
      }
    }
  }
}

//DELETE UNINFORMATIVE/JUNK VARIABLES
drop  subscriberid simid devicephonenum 

//Drop redundant var about interview date. I can't figure out the units 
drop today

//not informative id
assert mi(caseid)
drop caseid 

//drop enumerator confirmation variables
assert enumerator_confirm == 1111111111
assert enumerator_name_confirm==1 & enumerator_name_confirm_1==1
drop enumerator_confirm enumerator_name_confirm enumerator_name_confirm_1

//drop indicator vars of all-missing variables
assert mi(q110b) & mi(q110c)
drop q110b_? q110b__?? q110b_other q110c_? q110c__?? q110c_other

//assert redundant and drop
assert fr_birth_year==yr & fr_birth_year_callback == yr_callback
assert group==1
assert q301a==country & q301b==governorate  & ///
  q301c==district & q301d==subdistrict

drop yr yr_callback group country governorate district subdistrict 
****************************************
* Section 4 Cleaning: EDUCATION *
****************************************
//drop this CTO junk variable
assert do_did =="did you attend" if q401==0
assert do_did =="do you currently attending" if q401==1
drop do_did

foreach var of varlist q40*_3 {
  assert mi(`var')
  drop `var'
}


****************************************
* Section 5 Cleaning: HOUSEHOLD ROSTER *
****************************************
//Clean Household Roster Count

//manually count how many people there are
gen roster_count = . 
forv m=1/20 {
  local n=`m' + 1
  replace roster_count = `m' if !mi(q503_`m') & mi(q503_`n')
}
replace roster_count = 21 if !mi(q503_21) 
//two survey variables are consistent here on num of ppl
assert  q501 == household_roster_count
//there's an error here
foreach var of varlist household_roster_count q501{
  //replace `var' = roster_count if iid == 65584068 & `var'!= roster_count
  replace `var' = roster_count if id == "5943-1666" & `var'!= roster_count
}
assert roster_count==household_roster_count & roster_count==q501 if !mi(roster_count)
assert household_roster_count==0 & q501==0 if mi(roster_count)

//name a variable that is *total* household size, not the roster count
assert !mi(q501) & q501>=0
gen hh_size = q501 + 1
lab var hh_size "Total ppl in hh"
drop roster_count household_roster_count

//fix indexing for iid=67559947
forv n=1/2{
  local m = `n' + 1
  /* replace memberid`n'=`n' if memberid`n'==`m' & iid==67559947
  replace s5_index_`n'=`n' if s5_index_`n'==`m' & iid==67559947 */
  replace memberid`n'=`n' if memberid`n'==`m' & id=="1611-1790"
  replace s5_index_`n'=`n' if s5_index_`n'==`m' & id=="1611-1790"
}

//check all the indexing, then drop redundant variables
forv m=1/21 {
 display "`m'"
 assert memberid`m'==`m' | mi(memberid`m')
 assert s5_index_`m'==`m' | mi(s5_index_`m')
 //tab member_text_en_`m' member_index_text_en_`m'
 tab member_text_ar_`m'
 summ memberid`m' s5_index_`m'
}
drop member_text_en_* member_index_text_en_* member_text_ar_* member_index_text_ar_*

//drop redundant household roster variables
forv n=1/21 {
  assert memberid`n'==`n' if !mi(memberid`n')
  assert inrange(memberage`n',0,100) if !mi(memberage`n') //ages are reasonable 
}
//nobody has this many people in their household; drop these variables
forv n=22/50 {
  assert mi(memberid`n') & mi(memberage`n') 
  drop memberid`n' memberage`n'
}
 
//make an indicator for person on roster being employed outside the home
forv n=1/21 {
  gen worker_roster`n'= 1 if !mi( q508_`n')
  replace worker_roster`n' = 0 if inlist(q508_`n',50,84,85,60,61)
  lab var worker_roster`n' "HH member `n' is employed outside the home. 0 if housewife, child, student or not working"
}
  
//number of individuals under 19
cap drop __00000?
egen n_kids = rowtotal(under19_counter_*)
lab var n_kids "Number of individual <19 years old in hh"

//gender of hh head
assert inlist(q501a,0,1)
gen head_male = q202c if q501a==1 //assign head gender as FR gender is FR is head
replace head_male = q504_1 if q501a==0 //enumerators were supposed to enumerate head first if not FR 
lab var head_male "HH Head is male"

//one person claims to have no other hh members but is not the head. assign their gender as head gender
replace head_male = q202c if hh_size ==1 & q501a==0

//recode 1=male,2=female to 1=male,  0=female
recode  head_male (2 = 0) 
assert inlist(head_male,1,0,.)

assert inlist(q202c,1,2)
gen male = q202c
replace male = 0 if q202c ==2
label define male 1 "Male" 0 "Female"
lab values male male
lab var male "FR is male"

//IDENTIFY CHILD RANDOMLY SELECTED FROM ROSTER FOR SDQ EXAM
/* Our intention was to randomly select a child aged 3-8 to 
take the SDQ evaluation. The CTO code randomly chose a child 3-8, but then
only adminstered the section to households who had a biological child 
of the FR in this age range. This is an error.
This section  confirms that the randomization was done correctly, 
and confirms the enumeration error described above.*/

//count the number of children aged 3-8 in the household 
egen sum_eligabl_baileysversion = anycount(q505_? q505_??), values(3 4 5 6 7 8 )

//randnum_relevant_ was done correctly
forv n=1/21 {
  assert inrange(q505_`n',3,8) if !inlist(randnum_relevant_`n',.,-1)
  assert !inlist(randnum_relevant_`n',.,-1) if inrange(q505_`n',3,8)
}
//CTO does pick the correct max randum number
tempvar max 
egen `max' = rowmax(randnum_relevant_*)
forv n=1/21 {
  assert (randnum_relevant_`n' +.00001 ) >= `max'  if randnum_rank_`n'==1
  assert keep_position1_`n'==`n' if randnum_rank_`n'==1
  assert selected_position1==`n' if randnum_rank_`n'==1 & sum_eligabl_baileysversion>0
  assert selected_child_age==q505_`n' if randnum_rank_`n'==1 & sum_eligabl_baileysversion>0
  //create flag for the child randomly selected for the SDQ test; drop extra variables
  gen child_selectedsdq_`n' = 1 if randnum_rank_`n'==1 & sum_eligabl_baileysversion>0
  lab var child_selectedsdq_`n' "Child selected for SDQ"
}
//The problem is that only hh's with a BIOLOGICAL CHILD were surveyed 18.1 and 18.2
assert (!mi(q18_1_01) & !mi(q18_2_01_time_hh)) | id=="3931-9784" if sum_eligabl>0
assert mi(q18_1_01) & mi(q18_2_01_time_hh)  if sum_eligabl==0 & sum_eligabl_baileysversion>0

//why did some inelgible people still get a "selected name"/selected age?
/* people only got assigned a selected name if the sum of all the biggest indexes corresponded to
an index in the roster. If there were no elgible people, then all roster numbers were "biggest",
so the sum did not correspond to a real index, EXCEPT if there was only 1 person on the roster. */
gen test = subinstr(selected_join," 0","",1) if sum_eligabl_baileysversion==0 
destring test, replace 
assert test>(hh_size-1) |test==0 if mi(selected_child_age) & sum_eligabl_baileysversion==0 
assert test<=(hh_size-1) if !mi(selected_child_age) & sum_eligabl_baileysversion==0 

//replace these variables as missing when it was filled in erroneously 
//replace selected_child_name = "" if sum_eligabl_baileysversion==0 
replace selected_child_age =. if sum_eligabl_baileysversion==0 

//drop all the random variables generated to pick random SDQ
drop *randnum* keep_position* valid_indexes selected_position? selected_list selected_join ///
  test sum_eligabl_baileysversion

//CLEAN 2011 ROSTER
//replace calculated ages as missing for people with missing birthdates
forv n=1/23 {
  foreach var of varlist q526_2011_`n' q526_2020_`n' age_diff_`n' {
    assert inrange(`var',2000,3000) if q5_2_17_`n'==-99
    replace `var'=. if q5_2_17_`n'==-99
  }
}
//fix a typo, then drop redundant variables 
replace index_2011_10= 10 if id=="5258-8821" & index_2011_10==11

forv n=1/23{
  assert index_2011_`n'==`n' | mi(index_2011_`n')
  drop index_2011_`n'
}
***************************************
* Section 6: DWELLING CHARACTERISTICS *
***************************************

//confirm missing because ended survey early 
assert  breakoff_1==2 if mi(q601a)

//drop redundant variable
assert q601a== q601a_null
drop  q601a_null

//drop redundant arabic label
assert inlist(q612_units_label,"المجيب لا يمتلك أرض","")
drop q612_units_label



***************************************
* Section 7: FOOD CONSUMPTION *
***************************************
//everyone was surveyed about the same sections in the same order,
//so these variables are safe to drop
forv n=1/9 {
  assert s7_1_index_`n'==`n' if !mi(s7_1_index_`n')
  drop s7_1_index_`n'  s7_1_item_ar_`n'
}

//DATA CURRENTLY ARRANGED BY N'TH CATEGORY DISCUSSED, REARRANGE DATA BY TYPE OF GOODS 

 local _1  "Cereals and cereal products"
 local _2  "Meat, etc"
 local _3  "Fish and other seafood"
 local _4  "Dairy"
 local _5  "Oils and fats"
 local _6  "Fruits and nuts"
 local _7 "Vegetables, tubers, pulses"
 local _8 "Sugar and desserts"
 local _9 "Ready-made foods, etc."

forv n=1/9 {
  gen q708a_amount_`n' = .
  lab var q708a_amount_`n' "Amount consumed of `_`n'' gifted, month"
  gen q708a_unit_`n' = .
  lab var q708a_unit_`n' "Units of `_`n'' gifted, month"
  gen q708a_unit_other_`n' = ""
  lab var q708a_unit_other_`n' "Units (other) of `_`n'' gifted, month"  
}

tostring q708a2_unit_other_?, replace 
forv m=1/6 {
  forv n=1/9 {
    display "`m', `n'"
    replace q708a_amount_`n' = q708a2_`m' if activity_id_sec_7_`m'==`n'
    replace q708a_unit_`n' = q708a2_unit_`m' if activity_id_sec_7_`m'==`n'
    replace q708a_unit_other_`n' = q708a2_unit_other_`m' if activity_id_sec_7_`m'==`n' 
  }
}
//check I did this right
forv n=1/9 {
  assert !mi(q708a_amount_`n') if inlist(`n',activity_id_sec_7_1,activity_id_sec_7_2, ///
    activity_id_sec_7_3,activity_id_sec_7_4,activity_id_sec_7_5,activity_id_sec_7_6)
}

drop activity_id_sec_7_? activity_name_sec_7_? q708a2_? q708a2_unit_? q708a2_unit_other_?
***************************************
* Section 8: NON-FOOD EXPENDITURE *
***************************************
//categories 
 local _1  "Utilities"
 local _2  "Water"
 local _3  "Infant needs"
 local _4  "Basic HH and hygiene items"
 local _5  "Debt Repayment"
 local _6  "Linens"
 local _7  "Clothing"
 local _8  "Basic HH items (reusable)"
 local _9  "School Fees"

//DATA CURRENTLY ARRANGED BY N'TH CATEGORY DISCUSSED, REARRANGE DATA BY TYPE OF GOODS 
forv n=1/9 {
  gen giftvalue_`n' = .
  lab var giftvalue_`n' "Value of `_`n'' gifts in past year"
}

forv m=1/4 {
  forv n=1/9 {
    replace giftvalue_`n' = q805b_`m' if activity_id_sec_5_`m'==`n'
  }
}
drop activity_id_sec_5_? activity_name_sec_5_? q805b_?

***************************************
* Section 9: DURABLES *
***************************************
forv n=1/8 {
  tab s9_item_eng_`n'
  assert s9_index_`n'==`n' if !mi(s9_index_`n')
  drop s9_index_`n' s9_item_eng_`n' s9_item_ar_`n'
}

***************************************
* Section 11.1 : Self Employment *
***************************************
//Replace tailoring as retail trade for seamstresses
replace q11_1_02_1 = 6 if q11_1_02_1==-96 & ustrregexm(q11_1_02_other_1,"خياط" )

//"Selling empty cans" 6 Wholesale or retail trade
replace q11_1_02_2 = 6 if q11_1_02_2==-96 & ustrregexm(q11_1_02_other_2,"علب الفارغه")
// Hawking/ selling clothes, food, other items 6
replace q11_1_02a_2 = 6 if q11_1_02a_2==-96 & ustrregexm(q11_1_02a_other_2,"بيع العلب" )

//make sure that all 'other' codes have been assigned, and drop these other codes
assert q11_1_02_1!=-96  
assert q11_1_02_2!=-96  
assert q11_1_02a_2!=-96 

drop q11_1_02_other_1_c1 q11_1_02_other_2_c1 q11_1_02a_other_2_c1

forv n = 1/2 {
  egen months_se_active_`n' = rowtotal(q11_1_04_?_`n' q11_1_04_10_`n' q11_1_04_11_`n' q11_1_04_12_`n')
  replace months_se_active_`n' = 12 if q11_1_04_13_`n'==1
  replace months_se_active_`n' = 0 if q11_1_04_0_`n'==1
  replace months_se_active_`n' = . if mi(q11_1_02_`n')
  lab var months_se_active_`n' "Months business no. `n' was active"
}

egen num_businesses = rownonmiss(q11_1_02_1 q11_1_02_2)
replace num_businesses = . if mi(q11_1_01)
lab var num_businesses "Number of small businesses operated"
***************************************
* Section 11.2 : Employment *
***************************************


//OCCUPATION
//80  Driver (public or private) (delivery work)
replace q11_2_03_1=80 if q11_2_03_1==-96 & ustrregexm(q11_2_03_other_1,"عامل توصيل")  //ﻞﻴﺻﻮﺗ ﻞﻣﺎﻋ
//  Other skilled construction work (painter) 75
replace q11_2_03_1=75 if q11_2_03_1==-96 & ustrregexm(q11_2_03_other_1,"دهين") //ﻦﻴﻫﺩ
//  Mechanic ("tire technician") 20
replace q11_2_03_1=20 if q11_2_03_1==-96 & ustrregexm(q11_2_03_other_1,"عامل في  بناشر") //ﺮﺷﺎﻨﺑ  ﻲﻓ ﻞﻣﺎﻋ

//add codes for freelancer, gas station attendent/car wash, porter (G9 codes)
forv n=1/3 {
  label define q11_2_03_`n' 86 "Freelancer"87 "Gas station/car wash attendant" 88 "Porter", add
  lab values q11_2_03_`n' q11_2_03_`n'
}
forv n=1/3 {
  quietly count if q11_2_03_`n'==-96 
  if `r(N)'!=0 {
    replace q11_2_03_`n' = 86 if q11_2_03_`n'==-96 & ustrregexm(q11_2_03_other_`n',"أعمال حره") //freelancer ﻩﺮﺣ ﻝﺎﻤﻋﺃ
    replace q11_2_03_`n' = 87 if q11_2_03_`n'==-96 & ustrregexm(q11_2_03_other_`n',"عتال") //porter ﻝﺎﺘﻋ
    replace q11_2_03_`n' = 88 if q11_2_03_`n'==-96 & ///
      ustrregexm(q11_2_03_other_`n',"محطه محروقات|غسيل سيارات") //gas station/car wash attendent ﺕﺎﻗﻭﺮﺤﻣ ﻪﻄﺤﻣ|ﺕاﺭﺎﻴﺳ ﻞﻴﺴﻏ
  }
  assert q11_2_03_`n'!=-96
}


//INDUSTRY
//Replace tailoring as retail trade for seamstresses
replace q11_2_04_1=6 if q11_2_04_1==-96 & ustrregexm(q11_2_04_other_1, "خياط") //ﻁﺎﻴﺧ

label define q11_2_04_1 21 "Freelancing", add 
lab values q11_2_04_1 q11_2_04_1
replace q11_2_04_1=21 if q11_2_04_1==-96 & q11_2_04_other_1=="عمل حر" //ﺮﺣ ﻞﻤﻋ

//WORK PATTERNS - add "irregular"
label define q11_2_06_1 4 "Irregular" , add
label values q11_2_06_1 q11_2_06_1
replace q11_2_06_1 = 4 if q11_2_06_1==-96 & q11_2_06_other_1=="متفرق غير منتظم" //ﻢﻈﺘﻨﻣ ﺮﻴﻏ ﻕﺮﻔﺘﻣ

//!! there are some implausible work hours here: q11_2_06b_? q11_2_07_?


//drop labels for other categories
foreach var of varlist q11_2_03_1 q11_2_04_1 q11_2_06_1 q11_2_03_2 {
  assert `var' !=-96
}

drop q11_2_03_other_1_c1 q11_2_04_other_1_c1 q11_2_06_other_1_c1 q11_2_03_other_2_c1 

//flag: q11_2_07_1 includes some improbably high hours 


//Check "Another job?" variable 
forv n = 1/2{
  local m = `n' +1
  count if q11_2_13_`n'==1 & mi( q11_2_03_`m')
  count if q11_2_13_`n'!=1 & !mi( q11_2_03_`m')
  replace q11_2_13_`n'=1 if !mi( q11_2_03_`m') //they entered another job so fix "another job" question
}

egen num_jobs = rownonmiss(q11_2_03_?)
lab var num_jobs "No. jobs reported in last yr (possibly current? wording unclear)"
forv n = 1/3{
  count if mi( q11_2_03_`n') & num_jobs==`n'
  count if q11_2_13_`n'==1 & num_jobs==`n'
}

//WHY NOT LOOKING FOR WORK
label define q11_2_16 15  "Old age" 16  "Refusal from family to work" 17  "Waiting for job to start" ///
   18  "Social stigma" 19  "Don't have a work permit" 20  "Still a trainee/intern" ///
   21  "Waiting for university admission", add
lab values q11_2_16 q11_2_16
decode q11_2_16_other_c1, gen(trash)


replace q11_2_16 = 15 if trash == "Due to being old (Senior citizen)"
replace q11_2_16 = 16 if trash == "Refusal from family to work"
replace q11_2_16 = 17 if trash == "Waiting to be appointed in a job"
replace q11_2_16 = 18 if trash == "Societal difficulties"
replace q11_2_16 = 19 if trash == "Does not have a work permit"
replace q11_2_16 = 20 if trash == "Still a trainee/intern"
replace q11_2_16 = 21 if trash == "Waiting for university admission"

//Assign "I work occasionally" to 2 "Not interested in working"
replace q11_2_16 = 2 if trash == "I work intermittently"

assert q11_2_16 != -96 
drop trash q11_2_16_other_c1

//BARRIERS TO FINDING A JOB 
label define q11_2_19 29 "No barriers" 30 "No transportation" 31 "Age discrimination", add
lab values q11_2_19 q11_2_19
decode q11_2_19_other_c1, gen(trash)

replace q11_2_19 = 29  if trash == "There is no obstacle"
//6 Discrimination by employers
replace q11_2_19 = 29  if trash == "Nationality"
replace q11_2_19 = 29  if trash == "Being a refugee"
//No transportation
replace q11_2_19 = 30  if inlist(trash, "Not having a driving license", "Transportation")
replace q11_2_19 = 31  if trash == "Being at a certain age"

assert q11_2_19 != -96 
drop trash q11_2_19_other_c1

//NEGATIVE SHOCKS
//this variable is missing and it shouldn't be, but all info is in indicators, so drop
assert mi(q11_2_20)
drop q11_2_20

 
//OCCUPATION IN 2011
//add codes for freelancer, gas station attendent/car wash, porter (G9 codes)
label define q11_2_22 86 "Freelancer"87 "Gas station/car wash attendant" 88 "Porter", add
lab values q11_2_22 q11_2_22

replace q11_2_22 = 86 if q11_2_22_other=="أعمال حره" //ﻩﺮﺣ ﻝﺎﻤﻋﺃ
//  Cook/ Chef/ Caterer 77
replace q11_2_22 = 77 if q11_2_22_other=="عامل في محل حلويات" //code baker/confectioner as chef ﺕﺎﻳﻮﻠﺣ ﻞﺤﻣ ﻲﻓ ﻞﻣﺎﻋ
replace q11_2_22 = 87 if q11_2_22_other== "عتال"

assert q11_2_22!=-96
drop q11_2_22_other_c1

//INDUSTRY IN 2011
replace q11_2_23=17 if q11_2_23_other=="عمل منزلي" // mark domestic work as 17  Accommodation and food services
replace q11_2_23=18 if q11_2_23_other=="الجمارك" //mark customs (ie border and customs) as 18 Public administration
//add freelancing industry
lab define q11_2_23 21 Freelancing, add
lab values q11_2_23 q11_2_23

replace q11_2_23 = 21 if q11_2_23_other=="عمل حر"
assert q11_2_23!=-96
drop q11_2_23_other_c1


***************************************
* Section 12: Transfers *
***************************************

//CLEAN NUMBER OF TRANSFERS 
//these variables are currently missing because no one had more than 3 transfer relationships
//incoming transfers
assert mi(q1208)
tempvar q1208_ 
egen `q1208_' = rownonmiss(q1202_?)
replace q1208 = `q1208_'
//outgoing transfers
assert mi(q1217)
tempvar q1217_ 
egen `q1217_' = rownonmiss(q1211_?)
replace q1217 = `q1217_'


//CLEAN COUNTRY OF ORIGIN.
/* The following two programs create an english variable for country of
origin by decoding the encoded country variable and manually translating 
the countries in the "other" field  */
cap program drop country_cleaning 
program define country_cleaning, rclass
decode `1' , gen(`2' ) 

replace `2' = "Canada" if  `3'=="كندا"
replace `2' = "USA" if  `3'=="امريكا" | `3'== "اميركا"
replace `2' = "Germany" if `3'=="ألمانيا" | `3'== "المانيا"
replace `2' = "Switzerland" if  `3'=="سويسرا"
replace `2' = "Kuwait" if  `3'=="الكويت"
replace `2' = "Saudi Arabia" if  ustrregexm(`3',"السعودي")
replace `2' = "Sweden" if ustrregexm(`3',"سويد")
replace `2' = "Poland" if `3'== "بولندا"
replace `2' = "UAE" if `3'== "الإمارات"| `3' == "الامارات"
replace `2' = "Morocco" if `3'== "المغرب"
replace `2' = "Qatar" if  `3'=="قطر"
replace `2' = "Turkey" if  `3'=="تركيا"

assert !mi(`2') if !mi(`3')
assert !mi(`2') if `1' == -96

end 
 

country_cleaning q1204a_1 q1204a_english_1 q1204a_other_1
decode q1204a_2, gen(q1204a_english_2)

country_cleaning q1213a_1 q1213a_english_1 q1213a_other_1
country_cleaning q1213a_2 q1213a_english_2 q1213a_other_2


*****************************
/* CES-D Scoring */
*****************************
/* We used CESD-10 which uses 10 questions 
and has a max score of 30. Anything above 10 
is considered at risk for depression
All questions except e and h are score 0-3 
with 3 being "all the time". e and h are 
scored in the reverse. */
*****************************
foreach var of varlist q17_2_01a q17_2_01b q17_2_01c q17_2_01d ///
  q17_2_01f q17_2_01g  q17_2_01i q17_2_01j {
    //sad categories should be 0-3, 3 being depressed
    assert inrange(`var',1,4) | inlist(`var',.,-99)
    gen `var'_score = `var'-1 if `var'>0
    assert inrange(`var'_score,0,3) if !mi(`var'_score)
    gen `var'_score_mi = mi( `var'_score )
}
foreach var of varlist q17_2_01e q17_2_01h {
    //happy categories should be 0-3, 3 being not happy at all
    assert inrange(`var',1,4) | inlist(`var',.,-99,-97)
    gen `var'_score = 4-`var' if `var'>0
    assert inrange(`var'_score,0,3) if !mi(`var'_score)
    gen `var'_score_mi = mi( `var'_score )
}

egen cesd10 = rowtotal(q17*_score), missing
lab var cesd10 "CESD-10 Score"
egen cesd10_mi = rowmax(q17*_score_mi)
lab var cesd10_mi "Missing inputs to CESD-10 Score"

gen depressed = 0 if !mi(cesd10) & cesd10_mi==0
replace depressed = 1 if depressed == 0 & cesd10 >= 10
lab var depressed "FR has CESD-10 score >=10 out of 30"
lab define depressed 0 "FR Not Depressed" 1 "FR Depressed"
lab values depressed depressed 

drop q17_2_*_score q17_2_*_score_mi

*****************************
/* CHILD STRENGTHS AND DIFFICULTIES */
*****************************
//Figure out which kid we're talking about
gen q18_1_index = .
lab var q18_1_index "Index of child selected for SDQ"
gen child_relate_toH = .
lab var child_relate_toH "Relationship of SDQ child to FR"
forv n = 1/21 {
  replace q18_1_index = `n' if child_selectedsdq_`n' ==1 & selected_child_age==q505_`n' & !mi(selected_child_age)
  replace child_relate_toH = q506_`n' if q18_1_index == `n'
  //assert child_selectedsdq_`n'==1 if q18_1_index == `n' & iid!=12293537
  assert child_selectedsdq_`n'==1 if q18_1_index == `n' & id!="3931-9784"
} 

assert inrange(selected_child_age,3,8) if !mi(q18_1_index)

//The randomization was done correctly
forv n = 1/21 {
  assert q18_1_index == `n'  if child_selectedsdq_`n' ==1 & !mi(q18_1_01)
  assert child_selectedsdq_`n' ==1 if  q18_1_index == `n' & !mi(q18_1_01)
} 

//Make sure only 1 child gets child_selectedsdq_`n'==1
egen rowtotal = rowtotal(child_selectedsdq_*), mi
assert inlist(rowtotal,1,.)
assert rowtotal==1 if !mi(q18_1_01)

//Why did some FR's not do the SDQ despite having an eligible child picked?
  //A: the eligible (aged 3-8) child was not their biological child
count if rowtotal==1 & mi(q18_1_01)

assert !inlist(child_relate_toH,13,14) |id=="3931-9784" if rowtotal==1 & mi(q18_1_01)
drop rowtotal
lab values child_relate_toH q506_1

//code child's relationship to FR
assert !mi(child_relate_toH) if !mi(q18_1_01)
gen child_FR_codes = 0 if !mi(child_relate_toH)
//FR is child's mother
replace child_FR_codes = 1 if inlist(child_relate_toH,13,14,38,39) & male == 0 
//FR is child's father
replace child_FR_codes = 2 if inlist(child_relate_toH,13,14,38,39) & male == 1
//FR is not child's parent
replace child_FR_codes = 3 if !inlist(child_relate_toH,13,14,38,39) & !mi(child_relate_toH)
lab define child_FR_codes 1 "child's mother" 2 "child's father" 3 "not child's parent"
lab values child_FR_codes child_FR_codes
lab var child_FR_codes "Simplification of child_relate_toH"

//currently the variables are separated by the age-appropriate questions. 
//combine since they are mutually exclusive
foreach var of numlist 18 21 22 {
    assert (!mi(q18_1_`var'a) & !mi(q18_1_`var'b))==0
    assert selected_child_age == 3 if !mi(q18_1_`var'a)
    clonevar q18_1_`var' = q18_1_`var'a 
    replace q18_1_`var' = q18_1_`var'b if mi(q18_1_`var')
    quietly count if mi(q18_1_`var')
    assert `r(N)'==236
}

//Separate questions by whether or not 2=true or 2=not true 
local emotional_list "q18_1_03 q18_1_08 q18_1_13 q18_1_16 q18_1_24"

local conduct_list "q18_1_05 q18_1_07 q18_1_12 q18_1_18 q18_1_22"
local conduct_list_02 "q18_1_05,  q18_1_12, q18_1_18, q18_1_22"
local conduct_list_20  "q18_1_07"


local hyperactivity_list "q18_1_02 q18_1_10 q18_1_15 q18_1_21 q18_1_25"
local hyperactivity_list_02 "q18_1_02, q18_1_10, q18_1_15"
local hyperactivity_list_20 "q18_1_21, q18_1_25"

local peer_list "q18_1_06 q18_1_11 q18_1_14 q18_1_19 q18_1_23"
local peer_list_02 "q18_1_06, q18_1_19, q18_1_23"
local peer_list_20 "q18_1_11, q18_1_14"

local prosocial_list "q18_1_01 q18_1_04 q18_1_09 q18_1_17 q18_1_20"

//assert that q18_1_01 is a good representative for the whole section
egen rownonmiss = rownonmiss(q18_1_??)
assert inlist(rownonmiss,0,25)
drop rownonmiss

//emotional problems
cap drop emotional_problems
gen emotional_problems = 0 if !mi(q18_1_01)
foreach var in `emotional_list' {
    local label2 :  label  `var' 2
    assert "`label2'" == "Certainly true"
    replace emotional_problems = emotional_problems + `var' if inrange(`var',0,2) & !mi(emotional_problems)
}
//conduct problems
gen conduct_problems = 0 if !mi(q18_1_01)
foreach var in `conduct_list' {
    local label2 :  label  `var' 2
    display "`label2'"
    assert "`label2'" == "Certainly true" | "`label2'" == "2"
    if ustrregexm("`conduct_list_02'","`var'") {
        replace conduct_problems = conduct_problems + `var' if inrange(`var',0,2) & !mi(conduct_problems)
    }
    if ustrregexm("`conduct_list_20'","`var'") {    
        replace conduct_problems = conduct_problems + (2-`var') if inrange(`var',0,2) & !mi(conduct_problems)
    }
}
    
//hyperactivity
gen hyperactivity_problems = 0 if !mi(q18_1_01)
foreach var in `hyperactivity_list' {
    local label2 :  label  `var' 2
    assert "`label2'" == "Certainly true"  | "`label2'" == "2"
    if ustrregexm("`hyperactivity_list_02'","`var'") {
        replace hyperactivity_problems = hyperactivity_problems + `var' if inrange(`var',0,2) & !mi(hyperactivity_problems)
    }
    if ustrregexm("`hyperactivity_list_20'","`var'") {
        replace hyperactivity_problems = hyperactivity_problems + (2-`var') if inrange(`var',0,2) & !mi(hyperactivity_problems)
    }
}
//peer problems
gen peer_problems = 0 if !mi(q18_1_01)
foreach var in `peer_list' {
    local label2 :  label  `var' 2
    assert "`label2'" == "Certainly true"
    if ustrregexm("`peer_list_02'","`var'") {
        replace peer_problems = peer_problems + `var' if inrange(`var',0,2) & !mi(peer_problems)
    }
    if ustrregexm("`peer_list_20'","`var'") {
        replace peer_problems = peer_problems + (2-`var') if inrange(`var',0,2) & !mi(peer_problems)
    }
}

//prosocial scale
gen prosocial_problems = 0 if !mi(q18_1_01)
foreach var in `prosocial_list' {
    local label2 :  label  `var' 2
    assert "`label2'" == "Certainly true"
    replace prosocial_problems = prosocial_problems + `var' if inrange(`var',0,2) & !mi(prosocial_problems)
}

//deal with NA's
foreach list in emotional conduct hyperactivity  peer  prosocial {
    egen na_`list' = anycount(``list'_list') , values(-88 -99)
    lab var na_`list' "SDQ: Numb. missings in `list' section"
}

egen na_sdq = rowmax(na_emotional na_conduct na_hyperactivity na_peer na_prosocial)
replace na_sdq = . if mi(q18_1_01)
lab var na_sdq "SDQ: max number of missings within a section"

/* scale up: These scores can be scaled up pro-rata if at least 3 items were completed, e.g. a score of 4
based on 3 completed items can be scaled up to a score of 7 (6.67 rounded up) for 5 items. */

foreach var in emotional conduct hyperactivity peer prosocial {
  replace `var'_problems = round(`var'_problem * 5/4) if na_`var'==1
  replace `var'_problems = round(`var'_problem * 5/3) if na_`var'==2
  lab var `var'_problems "Continuous score for SDQ `var' problems"
}

egen total_difficulties = rowtotal(emotional_problems conduct_problems hyperactivity_problems peer_problems), mi
lab var total_difficulties "Continuous SDQ score"

cap program drop sdq_scoring 
program define sdq_scoring, rclass
local abnormal = `2' + 1
assert inrange(`1'_problems,0,10) if !mi(`1'_problems)
gen `1'_score = 0 if !mi(`1'_problems) & inrange(na_`1',0,2)
replace `1'_score = 1 if `1'_problems==`2' & inrange(na_`1',0,2)
replace `1'_score = 2 if inrange(`1'_problems,`abnormal',10) & inrange(na_`1',0,2)
lab var `1'_score "Official binned score for `1' problems"
end 

sdq_scoring emotional 4
sdq_scoring conduct 3 
sdq_scoring hyperactivity 6
sdq_scoring peer 3 

gen prosocial_score = 0 if inrange(prosocial_problems,6,10) & inrange(na_prosocial,0,2)
replace prosocial_score = 1 if prosocial_problems==5 & inrange(na_prosocial,0,2)
replace prosocial_score = 2 if inrange(prosocial_problems,0,4) & inrange(na_prosocial,0,2)
lab var prosocial_score "Official binned score for prosocial problems"

gen total_dif_score = 0 if inrange(total_difficulties,0,13) & inrange(na_sdq,0,2)
replace total_dif_score = 1 if inrange(total_difficulties,14,16) & inrange(na_sdq,0,2)
replace total_dif_score = 2 if inrange(total_difficulties,17,40) & inrange(na_sdq,0,2)
lab var total_dif_score "Official total SDQ binned score"

lab def sdq_scores 0 "Normal" 1 "Borderline" 2 "Abnormal"
foreach var of varlist emotional_score conduct_score hyperactivity_score peer_score ///
    prosocial_score total_dif_score {
        lab values `var' sdq_scores
        tab `var'
    }

cap drop  __00000?
save "${dir}/intermediate_public/Panel_Mar2020_clean", replace  


